pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio===0.7.0 -f https://download.pytorch.org/whl/torch_stable.html

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.7.0+cu101
  Downloading https://download.pytorch.org/whl/cu101/torch-1.7.0%2Bcu101-cp36-cp36m-linux_x86_64.whl (735.3MB)
     |████████████████████████████████| 735.3MB 23kB/s 
Collecting torchvision==0.8.1+cu101
  Downloading https://download.pytorch.org/whl/cu101/torchvision-0.8.1%2Bcu101-cp36-cp36m-linux_x86_64.whl (12.8MB)
     |████████████████████████████████| 12.8MB 182kB/s 
Collecting torchaudio===0.7.0
  Downloading https://files.pythonhosted.org/packages/3f/23/6b54106b3de029d3f10cf8debc302491c17630357449c900d6209665b302/torchaudio-0.7.0-cp36-cp36m-manylinux1_x86_64.whl (7.6MB)
     |████████████████████████████████| 7.6MB 3.8MB/s 
Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch==1.7.0+cu101) (0.16.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch==1.7.0+cu101) (1.18.5)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.6/dist-packages (from torch==1.7.0+cu101) (3.7.4.3)
Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from torch==1.7.0+cu101) (0.7)
Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from torchvision==0.8.1+cu101) (7.0.0)
Installing collected packages: torch, torchvision, torchaudio
  Found existing installation: torch 1.6.0+cu101
    Uninstalling torch-1.6.0+cu101:
      Successfully uninstalled torch-1.6.0+cu101
  Found existing installation: torchvision 0.7.0+cu101
    Uninstalling torchvision-0.7.0+cu101:
      Successfully uninstalled torchvision-0.7.0+cu101
Successfully installed torch-1.7.0+cu101 torchaudio-0.7.0 torchvision-0.8.1+cu101


import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt


# Create tensors.
x = torch.tensor(1., requires_grad=True)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

print(x)
print(w)
print(b)

tensor(1., requires_grad=True)
tensor(2., requires_grad=True)
tensor(3., requires_grad=True)


# Build a computational graph.
y = w * x + b    # y = 2 * x + 1 * b  or y = 1 * w + 1 * b

# Compute gradients.
y.backward()

# Print out the gradients.
print(x.grad)    # x.grad = 2 
print(w.grad)    # w.grad = 1 
print(b.grad)    # b.grad = 1

tensor(2.)
tensor(1.)
tensor(1.)


# Toy dataset
x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168], 
                    [9.779], [6.182], [7.59], [2.167], [7.042], 
                    [10.791], [5.313], [7.997], [3.1]], dtype=np.float32)

y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573], 
                    [3.366], [2.596], [2.53], [1.221], [2.827], 
                    [3.465], [1.65], [2.904], [1.3]], dtype=np.float32)
plt.plot(x_train, y_train, 'o')

[<matplotlib.lines.Line2D at 0x7fe9fc5a14a8>]


# Hyper-parameters
input_size = 1
output_size = 1
num_epochs = 100
learning_rate = 0.001


model = nn.Linear(input_size, output_size)


# Loss and optimizer
loss = nn.MSELoss() #  mean squared error loss
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # stochastic gradient descent


# Train the model
for epoch in range(num_epochs):
    # Convert numpy arrays to torch tensors
    inputs  = torch.from_numpy(x_train)
    targets = torch.from_numpy(y_train)

    # Forward pass
    outputs = model(inputs) # model = nn.Linear(input_size, output_size)
    l = loss(outputs, targets)
    
    # Backward and optimize
    optimizer.zero_grad() # clear the gradiants assocated with the tensors 
    l.backward()
    optimizer.step() # apply the optimization on the parameters
    
    if (epoch+1) % 5 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, l.item()))

Epoch [5/100], Loss: 0.1736
Epoch [10/100], Loss: 0.1735
Epoch [15/100], Loss: 0.1735
Epoch [20/100], Loss: 0.1735
Epoch [25/100], Loss: 0.1735
Epoch [30/100], Loss: 0.1735
Epoch [35/100], Loss: 0.1735
Epoch [40/100], Loss: 0.1735
Epoch [45/100], Loss: 0.1735
Epoch [50/100], Loss: 0.1735
Epoch [55/100], Loss: 0.1734
Epoch [60/100], Loss: 0.1734
Epoch [65/100], Loss: 0.1734
Epoch [70/100], Loss: 0.1734
Epoch [75/100], Loss: 0.1734
Epoch [80/100], Loss: 0.1734
Epoch [85/100], Loss: 0.1734
Epoch [90/100], Loss: 0.1734
Epoch [95/100], Loss: 0.1734
Epoch [100/100], Loss: 0.1733


# Plot the graph
predicted = model(torch.from_numpy(x_train)).detach().numpy()
plt.plot(x_train, y_train, 'ro', label='Original data')
plt.plot(x_train, predicted, label='Fitted line')
plt.legend()
plt.show()

# Save the model checkpoint
torch.save(model.state_dict(), 'linear_model.ckpt')


import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


# Hyper-parameters 
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 10
batch_size = 100
learning_rate = 0.001


# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='../../data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='../../data', 
                                          train=False, 
                                          transform=transforms.ToTensor())


!ls ../../data/MNIST/*/*

../../data/MNIST/processed/test.pt
../../data/MNIST/processed/training.pt
../../data/MNIST/raw/t10k-images-idx3-ubyte
../../data/MNIST/raw/t10k-images-idx3-ubyte.gz
../../data/MNIST/raw/t10k-labels-idx1-ubyte
../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
../../data/MNIST/raw/train-images-idx3-ubyte
../../data/MNIST/raw/train-images-idx3-ubyte.gz
../../data/MNIST/raw/train-labels-idx1-ubyte
../../data/MNIST/raw/train-labels-idx1-ubyte.gz


# Data loader
# The DataLoader will shuffle the data and split the data into batches
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)


# Fully connected neural network with one hidden layer
class FCNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FCNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) # from input layer to the hidden layer
        self.relu = nn.ReLU() # activation function, adding non-linearity
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


model = FCNet(input_size, hidden_size, num_classes).to(device) # instantiate the model, move the parameters to device(GPU)

# Loss and optimizer
loss = nn.CrossEntropyLoss() # nn.CrossEntropyLoss() combines nn.LogSoftmax() and nn.NLLLoss() (negative log likelihood loss)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        l = loss(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad() # set the gradients to 0
        l.backward() # calculate gradients
        optimizer.step() # modify the parameters
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, l.item()))

Epoch [1/10], Step [100/600], Loss: 0.3612
Epoch [1/10], Step [200/600], Loss: 0.2272
Epoch [1/10], Step [300/600], Loss: 0.1909
Epoch [1/10], Step [400/600], Loss: 0.2317
Epoch [1/10], Step [500/600], Loss: 0.2060
Epoch [1/10], Step [600/600], Loss: 0.1880
Epoch [2/10], Step [100/600], Loss: 0.1032
Epoch [2/10], Step [200/600], Loss: 0.0818
Epoch [2/10], Step [300/600], Loss: 0.0568
Epoch [2/10], Step [400/600], Loss: 0.0568
Epoch [2/10], Step [500/600], Loss: 0.0617
Epoch [2/10], Step [600/600], Loss: 0.1426
Epoch [3/10], Step [100/600], Loss: 0.0441
Epoch [3/10], Step [200/600], Loss: 0.0752
Epoch [3/10], Step [300/600], Loss: 0.1253
Epoch [3/10], Step [400/600], Loss: 0.0884
Epoch [3/10], Step [500/600], Loss: 0.0288
Epoch [3/10], Step [600/600], Loss: 0.1125
Epoch [4/10], Step [100/600], Loss: 0.0393
Epoch [4/10], Step [200/600], Loss: 0.0648
Epoch [4/10], Step [300/600], Loss: 0.0798
Epoch [4/10], Step [400/600], Loss: 0.0154
Epoch [4/10], Step [500/600], Loss: 0.0378
Epoch [4/10], Step [600/600], Loss: 0.0488
Epoch [5/10], Step [100/600], Loss: 0.0701
Epoch [5/10], Step [200/600], Loss: 0.0537
Epoch [5/10], Step [300/600], Loss: 0.0895
Epoch [5/10], Step [400/600], Loss: 0.0507
Epoch [5/10], Step [500/600], Loss: 0.0840
Epoch [5/10], Step [600/600], Loss: 0.0265
Epoch [6/10], Step [100/600], Loss: 0.0122
Epoch [6/10], Step [200/600], Loss: 0.0096
Epoch [6/10], Step [300/600], Loss: 0.0071
Epoch [6/10], Step [400/600], Loss: 0.0198
Epoch [6/10], Step [500/600], Loss: 0.0129
Epoch [6/10], Step [600/600], Loss: 0.0198
Epoch [7/10], Step [100/600], Loss: 0.0835
Epoch [7/10], Step [200/600], Loss: 0.0147
Epoch [7/10], Step [300/600], Loss: 0.0143
Epoch [7/10], Step [400/600], Loss: 0.0046
Epoch [7/10], Step [500/600], Loss: 0.0234
Epoch [7/10], Step [600/600], Loss: 0.0165
Epoch [8/10], Step [100/600], Loss: 0.0090
Epoch [8/10], Step [200/600], Loss: 0.0278
Epoch [8/10], Step [300/600], Loss: 0.0131
Epoch [8/10], Step [400/600], Loss: 0.0442
Epoch [8/10], Step [500/600], Loss: 0.0797
Epoch [8/10], Step [600/600], Loss: 0.0131
Epoch [9/10], Step [100/600], Loss: 0.0043
Epoch [9/10], Step [200/600], Loss: 0.0353
Epoch [9/10], Step [300/600], Loss: 0.0130
Epoch [9/10], Step [400/600], Loss: 0.0438
Epoch [9/10], Step [500/600], Loss: 0.0043
Epoch [9/10], Step [600/600], Loss: 0.0052
Epoch [10/10], Step [100/600], Loss: 0.0054
Epoch [10/10], Step [200/600], Loss: 0.0225
Epoch [10/10], Step [300/600], Loss: 0.0052
Epoch [10/10], Step [400/600], Loss: 0.0018
Epoch [10/10], Step [500/600], Loss: 0.0087
Epoch [10/10], Step [600/600], Loss: 0.0087


# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1) # max, max_indices
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'FC_model.ckpt')

Accuracy of the network on the 10000 test images: 98.25 %


import torch
import torch.nn as nn

import torchvision
import torchvision.transforms as transforms

import numpy as np 
import matplotlib.pyplot as plt


num_samples = 100000
n_input_dim = 2
x = np.random.rand(num_samples, n_input_dim) - [0.5, 0.5]
x = x * 3

y = np.ones(num_samples)
for i in range(0, len(x)):
    if x[i][1] * x[i][1] + x[i][0] * x[i][0] < 1:
        y[i] = 1
    else:
        y[i] = 0

plt.figure(figsize=(15,15))
plt.scatter(x[:,0], x[:,1], c=y, s=1)

<matplotlib.collections.PathCollection at 0x7f4c2615ba58>


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Training data
points = torch.from_numpy(x).float().to(device)
labels = torch.from_numpy(y).long().to(device)
print(points.shape)
print(labels.shape)

cuda
torch.Size([100000, 2])
torch.Size([100000])


class FCNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FCNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) # from input layer to the hidden layer
        self.relu = nn.ReLU() # activation function, adding non-linearity
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Hyper-parameters 
input_size = 2
num_classes = 2
num_epochs = 2000
batch_size = 100
learning_rate = 0.001


import time


# Hyper-parameters 
hidden_size = 100

model = FCNet(input_size, hidden_size, num_classes).to(device) # instantiate the model, move the parameters to device(GPU)

# Loss and optimizer
loss = nn.CrossEntropyLoss() # nn.CrossEntropyLoss() combines nn.LogSoftmax() and nn.NLLLoss() (negative log likelihood loss)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

start = time.time()
# Train the model
for epoch in range(num_epochs):
    # Convert numpy arrays to torch tensors
    # Forward pass
    outputs = model(points) # model = nn.Linear(input_size, output_size)
    l = loss(outputs, labels)
    
    # Backward and optimize
    optimizer.zero_grad() # clear the gradiants assocated with the tensors 
    l.backward()
    optimizer.step() # apply the optimization on the parameters
    if epoch % 1000 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, l.item()))
        
end = time.time()
print('time used:', end - start)
with torch.no_grad():
    labels = labels.to(device)
    outputs = model(points)
    _, predicted = torch.max(outputs.data, 1) # max, max_indices

Epoch [1/2000], Loss: 0.6593
Epoch [1001/2000], Loss: 0.0553
time used: 10.487798929214478


# plot the prediction results
plt.figure(figsize=(15,15))
predicted = predicted.cpu()
plt.scatter(x[:,0], x[:,1], c=predicted, s=1)

<matplotlib.collections.PathCollection at 0x7f1b3820f860>

Introduction¶

Pytorch basics¶

1. Autograd: Automatic Differentiation¶

2. Linear regression¶

3. Feedforward neural network¶

torch.nn.Module¶

4. Decision boundaries of deep learning models¶

4.1 Generation of training examples¶

4.2 The model (two input units, one hidden layer, two output units)¶

4.3 Hidden layer size = 10¶

4.4 Different Hidden layer sizes¶

4.5 The Gaussian Error Linear Units (GELU) activation function (a smoothed version of ReLU)¶